# TRADE DATA SET UP ----------------------------------------------------------------------
# Run once the first time you open it, and never again unless you delete the files
# trade flow data url
URL <- "http://www.cepii.fr/DATA_DOWNLOAD/baci/trade_flows/BACI_HS17_V202001.zip"
# create a file on the user's desktop
download.file(url = URL, destfile = "~/Desktop/trade_flows.zip")
# DATA READ IN ---------------------------------------------------------------------------
# unzip the file and save the .csv to the user's desktop
tradeflows <- unzip("~/Desktop/trade_flows.zip", exdir = "~/Desktop")
# read in trade flow data
trade_flow <- read_csv(tradeflows)
## Parsed with column specification:
## cols(
## t = col_double(),
## i = col_double(),
## j = col_double(),
## k = col_character(),
## v = col_double(),
## q = col_double()
## )
# read in country code data
country_codes <- read.csv("../data sets/country_codes.csv")
# read in product code data
product_codes <- read.csv("../data sets/product_codes.csv")
# DATA WRANGLING -------------------------------------------------------------------------
# remove extraneous variables
countries <- country_codes %>%
select(country_code, country_name_full)
# recode levels to match with world map later (not entirely complete, add more if noticed)
countries$country_name_full <- recode(factor(countries$country_name_full),
"USA, Puerto Rico and US Virgin Islands" = "USA",
"Plurinational State of Bolivia" = "Bolivia",
"Russian Federation" = "Russia",
"France, Monaco" = "France",
"Southern African Customs Union" = "South Africa",
"Viet Nam" = "Vietnam",
"United Republic of Tanzania" = "Tanzania",
"United Kingdom" = "UK",
"Norway, Svalbard and Jan Mayen" = "Norway")
# master data set with exports and imports (might be useful for networks)
trade_2018 <- trade_flow %>%
# make variable names descriptive
rename(year = t, product = k, exporter_code = i, importer_code = j, value = v,
quantity = q) %>%
# year is always 2018 - unnecessary
select(-year) %>%
# add exporter country names
left_join(countries, by = c("exporter_code" = "country_code")) %>%
rename(exporter_name = country_name_full) %>%
# add importer country names
left_join(countries, by = c("importer_code" = "country_code")) %>%
rename(importer_name = country_name_full)
# separate data set for exports
trade_exports <- trade_2018 %>%
select(c(product, value, quantity, exporter_code, exporter_name))
# separate data set for imports
trade_imports <- trade_2018 %>%
select(c(product, value, quantity, importer_code, importer_name))
# proportion data sets
whole_value <- sum(trade_2018$value)
export_prop <- trade_exports %>%
group_by(exporter_name) %>%
summarize(export_pct = 100*(sum(value)/whole_value))
## `summarise()` ungrouping output (override with `.groups` argument)
import_prop <- trade_imports %>%
group_by(importer_name) %>%
summarize(import_pct = 100*(sum(value)/whole_value))
## `summarise()` ungrouping output (override with `.groups` argument)
# MAP SETUP ------------------------------------------------------------------------------
world <- map("world", fill = TRUE, plot = FALSE)
world$names <- str_replace(world$names, pattern = ":.*$", replacement = "")
# EXPORT MAP -----------------------------------------------------------------------------
export_pct_named <- export_prop$export_pct
names(export_pct_named) <- export_prop$exporter_name
world$export_pct <- export_pct_named[world$names]
mypalette <- colorNumeric(palette = "viridis", domain = world$export_pct,
na.color = "transparent")
leaflet(world) %>%
addTiles() %>%
setView(lat=10, lng=0 , zoom=2) %>%
addPolygons(color = "white", fillColor = ~mypalette(export_pct), weight = 1) %>%
addLegend(pal = mypalette, values = ~export_pct,
title = "Export Percent", position = "bottomleft")
# IMPORT MAP -----------------------------------------------------------------------------
import_pct_named <- import_prop$import_pct
names(import_pct_named) <- import_prop$importer_name
world$import_pct <- import_pct_named[world$names]
mypalette <- colorNumeric(palette = "viridis", domain = world$import_pct,
na.color = "transparent")
leaflet(world) %>%
addTiles() %>%
setView(lat=10, lng=0 , zoom=2) %>%
addPolygons(color = "white", fillColor = ~mypalette(import_pct), weight = 1) %>%
addLegend(pal = mypalette, values = ~import_pct,
title = "Import Percent", position = "bottomleft")
# Combine export + import data for clustering
trade_combine <- export_prop %>%
inner_join(import_prop, by = c("exporter_name" = "importer_name")) %>%
rename(country = exporter_name)
# Testing optimal number of clusters
fig <- matrix(NA, nrow=10, ncol=2)
set.seed(75)
for (i in 1:10){
fig[i,1] <- i
fig[i,2] <- kmeans(trade_combine[,2:3]
, centers=i
, nstart=20)$tot.withinss
}
ggplot(data = as.data.frame(fig), aes(x = V1, y = V2)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks=c(1:10)) +
labs(x = "K", y = expression("Total W"[k]))
ggplot(data = as.data.frame(fig[2:10,]), aes(x = V1, y = V2)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks=c(1:10)) +
labs(x = "K", y = expression("Total W"[k]))
# Seems like 3 clusters are best
# Use k-means clustering to identify 3 clusters based on contribution in terms of export and import to world trade
set.seed(100)
vars <- c("export_pct" , "import_pct")
km_trade <- kmeans(trade_combine[,vars], centers=3, nstart=20)
trade_combine_clust3 <- trade_combine %>%
mutate(clust3 = as.character(km_trade$cluster))
# visualize the cluster assignments and centroids
ggplot(data = trade_combine_clust3, aes(x = import_pct, y = export_pct)) +
geom_point(aes(color = clust3)) +
coord_fixed() +
geom_point(data = as.data.frame(km_trade$centers)
, aes(x = import_pct, y = export_pct)
, pch = "X"
, size = 4) +
labs(x = "Import percentage of world import"
, y = "Export percentage of world export"
, color = "Cluster Assignment")
# Getting trade between countries
trade_network <- trade_2018 %>%
select(value, exporter_name, importer_name) %>%
group_by(exporter_name, importer_name) %>%
summarize(total_trade = sum(value))
## `summarise()` regrouping output by 'exporter_name' (override with `.groups` argument)
# Create network graph (currently too crowded)
trade_network_data <- graph_from_data_frame(trade_network
, directed = TRUE)
summary(trade_network_data)
## IGRAPH 7bb9475 DN-- 221 23805 --
## + attr: name (v/c), total_trade (e/n)
trade_network_graph <- ggnetwork(trade_network_data)
ggplot(data = trade_network_graph
, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(arrow=arrow(type="closed", length=unit(6,"pt"))
, color = "lightgray") +
geom_nodes() +
geom_nodelabel(aes(label = name)) +
theme_blank()
# Matching with clusters for better information
trade_network_clust <- trade_network %>%
left_join(trade_combine_clust3, by = c("exporter_name" = "country")) %>%
rename(clust_ex = clust3) %>%
select(exporter_name, importer_name, total_trade, clust_ex) %>%
left_join(trade_combine_clust3, by = c("importer_name" = "country")) %>%
rename(clust_im = clust3) %>%
select(exporter_name, importer_name, total_trade, clust_ex, clust_im)
# See network within cluster 2
clust_2_network <- trade_network_clust %>%
filter(clust_ex == 2) %>%
filter(clust_im == 2)
clust_2_network_data <- graph_from_data_frame(clust_2_network
, directed = TRUE)
clust_2_network_graph <- ggnetwork(clust_2_network_data)
ggplot(data = clust_2_network_graph
, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(aes(color = total_trade), arrow = arrow(type = "closed", length = unit(6,"pt")),
curvature = 0.1, size = 0.8) +
geom_nodes() +
geom_nodelabel(aes(label = name)) +
theme_blank() +
labs(title = "Network for countries in cluster 2",
color = "2017 trade volume") +
scale_color_continuous(type = "viridis")
# See network within cluster 3 (not super useful bc there are too many countries within this)
clust_3_network <- trade_network_clust %>%
filter(clust_ex == 3) %>%
filter(clust_im == 3)
clust_3_network_data <- graph_from_data_frame(clust_3_network
, directed = TRUE)
clust_3_network_graph <- ggnetwork(clust_3_network_data)
ggplot(data = clust_3_network_graph
, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(aes(color = total_trade), arrow = arrow(type = "closed", length = unit(6,"pt")),
curvature = 0.1, size = 0.8) +
geom_nodes() +
geom_nodelabel(aes(label = name)) +
theme_blank() +
labs(title = "Network for countries in cluster 3",
color = "2017 trade volume") +
scale_color_continuous(type = "viridis")
## Trying Out Some Network Graphs
# function for an interactive network
# thickness corresponds to the volume of trade
# you can pull the nodes towards the edge to better see the graph
# click on a node to highlight its exports
make_network <- function(network) {
c.factor <- function(..., recursive=TRUE) unlist(list(...), recursive=recursive)
name_vec <- name_vec <- c.factor(unique(c.factor(unique(network$exporter_name),
unique(network$importer_name))))
from <- network$exporter_name
to <- network$importer_name
trade <- network$total_trade
edges <- data.frame(from = from, to = to, value = trade)
nodes <- data.frame(id = name_vec, label = name_vec, group = name_vec)
visNetwork(nodes, edges, height = "1000px", width = "100%") %>%
visNodes(shape = "dot") %>%
visEdges(arrows = "to", arrowStrikethrough = F, smooth = T, physics = F) %>%
visOptions(highlightNearest = list(enabled = T, degree = 0)) %>%
visLayout(randomSeed = 17)
}
make_network(clust_2_network)
make_network(clust_3_network)
# more generic graphs but still interactive
visIgraph(clust_2_network_data)
visIgraph(clust_3_network_data)
# read in world map data from the maps package
world_map <- map_data(map = "world", region = ".")
#colored graph for exported percentages --------------------------------------------------
# get the data ready
export_prop %>%
rename(region = exporter_name) %>%
inner_join(world_map, by = "region") %>%
# create the plot
ggplot(aes(x = long, y = lat, group = group, fill = export_pct)) +
geom_polygon(color = "black", alpha = 0.8) +
theme_void() +
coord_fixed(ratio = 1.3) +
labs(title = "Each country's contribution to total worldwide export in percentages",
fill = "Export Percentage: ",
caption = "*Countries in white have no data*") +
theme(legend.position = "bottom") +
scale_fill_viridis(option = "viridis", direction = -1)
# colored graph for imported percentages -------------------------------------------------
# get the data ready
import_data <- import_prop %>%
rename(region = importer_name) %>%
inner_join(world_map, import_prop, by = "region")
# create the plot
ggplot(import_data, aes(x = long, y = lat, group = group, fill = import_pct)) +
geom_polygon(color = "black", alpha = 0.8) +
theme_void() +
coord_fixed(ratio = 1.3) +
labs(title = "Each country's contribution to total worldwide import in percentages",
fill = "Import Percentage: ",
caption = "*Countries in white have no data*") +
theme(legend.position="bottom") +
scale_fill_viridis(option = "plasma", direction = -1)
# stuff to work on -----------------------------------------------------------------------
#try for arrows, but work on this weekend
#tradedata2018_both_direct <- graph_from_edgelist(as.matrix(tradedata2018_both[,3:4]),
# directed = TRUE)
#
#ggplot(data = ggnetwork(tradedata2018_both_direct), aes(x = x, y = y, xend = xend,
#yend = yend)) +
#geom_edges(arrow=arrow(type="closed", #length=unit(6,"pt"))
# , color = "lightgray") + geom_nodes() + geom_nodelabel(aes(label = name)) +
#theme_blank()